library(ggplot2)
library(plotly)
library(ggmap)
library(sf)
library(dplyr)
library(leaflet)
library(htmlwidgets)
library(tmap)
library(sfheaders)
library(geojsonio)
Registered S3 method overwritten by 'geojsonsf':
  method        from   
  print.geojson geojson

Attaching package: ‘geojsonio’

The following object is masked from ‘package:base’:

    pretty
# Read the shapefile of Sydney suburbs
shp_path <- "/Users/LauraWu/Desktop/DATA5002 24T3/DATA5002 Project/GDA94/nsw_localities.shp"

syd <- st_read(shp_path)
Reading layer `nsw_localities' from data source 
  `/Users/LauraWu/Desktop/DATA5002 24T3/DATA5002 Project/GDA94/nsw_localities.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 4610 features and 6 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 140.9993 ymin: -37.50534 xmax: 159.1054 ymax: -28.15702
Geodetic CRS:  GDA94
# View column names and attribute data
# colnames(syd)
# head(syd)

Loading the dataset

airbnb <- read.csv("/Users/LauraWu/Desktop/DATA5002 24T3/DATA5002 Project/listings_summary_dec18.csv")

General picture

# General picture
suburb_listing_counts <- airbnb %>%
  group_by(city) %>%
  summarise(listings_count = n())

# Sort by number of listings (optional)
top_10_suburbs <- suburb_listing_counts %>%
  arrange(desc(listings_count)) %>%
   slice_head(n = 10)

# View the data
head(top_10_suburbs)
# Create the bar plot for the number of listings in each suburb
bar_plot <- ggplot(top_10_suburbs, aes(x = reorder(city, listings_count), y = listings_count)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Top 10 the Number of Listings Suburbs", x = "Suburb", y = "Number of Listings") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme_minimal()

# Convert ggplot to plotly for interactivity
interactive_bar_plot <- ggplotly(bar_plot)
interactive_bar_plot
# Save the plot to an HTML file
htmlwidgets::saveWidget(interactive_bar_plot, "suburb_listing_bar_chart.html")
# clean NA values rows
syd_airbnb <- airbnb %>%
  filter(!is.na(price)) 

# Convert Airbnb data to spatial format
airbnb_sf <- st_as_sf(syd_airbnb, coords = c("longitude", "latitude"), crs = 4326)
airbnb_polygon <- airbnb_sf %>%
  group_by(id) %>% 
  summarise(geometry = st_union(geometry)) %>%
  st_convex_hull() 

View(airbnb_polygon)
# Perform spatial join
airbnb_polygon_tranformed <- st_transform(airbnb_polygon, crs = st_crs(syd))

airbnb_polygon_joined <- st_join(syd, airbnb_polygon_tranformed, join = st_intersects)

airbnb_with_suburbs <- airbnb_polygon_joined %>%
  filter(!is.na(id))

airbnb_with_suburbs_with_price <- merge(syd_airbnb, airbnb_with_suburbs, by = "id")
# Calculate price statistics per suburb
price_stats <- airbnb_with_suburbs_with_price %>%
  group_by(LOC_NAME) %>%
  summarise(
    avg_price = mean(price, na.rm = TRUE),
    min_price = min(price, na.rm = TRUE),
    max_price = max(price, na.rm = TRUE),
    median_price = median(price, na.rm = TRUE))
# filter for interactive plot
suburbs <- unique(airbnb_with_suburbs_with_price$LOC_NAME)

# Create an initial plot with traces for each suburb
price_suburbs <- plot_ly()

for (suburb in suburbs) {
  price_suburbs <- price_suburbs %>%
    add_trace(
      data = filter(airbnb_with_suburbs_with_price, LOC_NAME == suburb),
      x = ~LOC_NAME,
      y = ~price,
      type = "box",
      name = suburb,
      visible = FALSE
    )
}

# Add a default trace (e.g., All Suburbs)
price_suburbs <- price_suburbs %>%
  add_trace(
    data = airbnb_with_suburbs_with_price,
    x = ~LOC_NAME,
    y = ~price,
    type = "box",
    name = "All Suburbs",
    visible = TRUE
  )

# Define dropdown buttons
buttons <- list(
  list(
    label = "All Suburbs",
    method = "update",
    args = list(list(visible = c(rep(FALSE, length(suburbs)), TRUE)), 
                list(title = "Price Distribution: All Suburbs"))
  )
)

# Add buttons for each suburb
for (i in seq_along(suburbs)) {
  buttons <- append(buttons, list(
    list(
      label = suburbs[i],
      method = "update",
      args = list(list(visible = c(rep(FALSE, i - 1), TRUE, rep(FALSE, length(suburbs) - i), FALSE)),
                  list(title = paste("Price Distribution:", suburbs[i])))
    )
  ))
}

# Add layout with dropdown
price_suburbs <- price_suburbs %>%
  layout(
    title = "Price Distribution Across Suburbs",
    xaxis = list(title = "Suburb", tickangle = 45),
    yaxis = list(title = "Price ($)"),
    updatemenus = list(
      list(
        type = "dropdown",
        x = 0.1, y = 1.2,
        buttons = buttons
      )
    )
  )

# Display the plot
price_suburbs
# Average price by suburbs
price_avg_suburbs <- airbnb_with_suburbs_with_price %>%
  group_by(LOC_NAME) %>%
  summarise(avg_price = mean(price, na.rm = TRUE))%>%
  filter(!is.na(avg_price))

syd_price_avg_suburbs <- syd %>%
  left_join(price_avg_suburbs, by = c("LOC_NAME" = "LOC_NAME")) %>%
  filter(!is.na(avg_price)) 

suburbs <- unique(airbnb_with_suburbs_with_price$LOC_NAME)

# Create an initial plot with choropleth traces for each suburb
price_suburbs_map <- plot_ly()

# Iterate over suburbs to create choropleth traces
for (suburb in suburbs) {
  price_suburbs_map <- price_suburbs_map %>%
    add_trace(syd_price_avg_suburbs,
      type = "choropleth",
      locations = ~LOC_NAME,
      z = ~avg_price,  # Replace with the column for average price
      hoverinfo = "location+z",  # Display both location and value
      color = ~avg_price,  # Color based on average price
      colorscale = "YlOrRd",  # Color scale
      name = suburb,
      visible = FALSE,  # Set all traces as hidden by default
      colorbar = list(title = "Avg Price", ticks = "outside")
    )
}

# Add a default trace (e.g., All Suburbs)
price_suburbs_map <- price_suburbs_map %>%
  add_trace(syd_price_avg_suburbs,
    type = "choropleth",
    locations = ~LOC_NAME,
    z = ~avg_price,
    hoverinfo = "location+z",
    color = ~avg_price,
    colorscale = "YlOrRd",
    name = "All Suburbs",
    visible = TRUE,
    colorbar = list(title = "Avg Price", ticks = "outside")
  )

# Define dropdown buttons for each suburb
buttons <- list(
  list(
    label = "All Suburbs",
    method = "update",
    args = list(list(visible = c(rep(FALSE, length(suburbs)), TRUE)), 
                list(title = "Price Distribution: All Suburbs"))
  )
)

# Add buttons for each suburb
for (i in seq_along(suburbs)) {
  buttons <- append(buttons, list(
    list(
      label = suburbs[i],
      method = "update",
      args = list(list(visible = c(rep(FALSE, i - 1), TRUE, rep(FALSE, length(suburbs) - i), FALSE)),
                  list(title = paste("Price Distribution:", suburbs[i])))
    )
  ))
}

# Add layout with dropdown menu and customize the map appearance
price_suburbs_map <- price_suburbs_map %>%
  layout(
    geo = list(
      showlakes = TRUE,
      lakecolor = 'white',
      projection = list(type = 'mercator'),
      scope = 'world',  # You can set this to a specific region if needed
      visible = TRUE
    ),
    title = "Price Distribution Across Suburbs",
    updatemenus = list(
      list(
        type = "dropdown",
        x = 0.1, y = 1.2,
        buttons = buttons
      )
    )
  )

# Display the map
price_suburbs_map
Error in eval(expr, data, expr_env) : object 'LOC_NAME' not found
---
title: "R Notebook"
output: html_notebook
---
 


```{r}
library(ggplot2)
library(plotly)
library(ggmap)
library(sf)
library(dplyr)
library(leaflet)
library(htmlwidgets)
library(tmap)
library(sfheaders)
library(geojsonio)
```


```{r}
# Read the shapefile of Sydney suburbs
shp_path <- "/Users/LauraWu/Desktop/DATA5002 24T3/DATA5002 Project/GDA94/nsw_localities.shp"

syd <- st_read(shp_path)

# View column names and attribute data
# colnames(syd)
# head(syd)
```

**Loading the dataset**
```{r}
airbnb <- read.csv("/Users/LauraWu/Desktop/DATA5002 24T3/DATA5002 Project/listings_summary_dec18.csv")
```


**General picture**

- Top 10 number of listings Suburbs
```{r}
# General picture
suburb_listing_counts <- airbnb %>%
  group_by(city) %>%
  summarise(listings_count = n())

# Sort by number of listings (optional)
top_10_suburbs <- suburb_listing_counts %>%
  arrange(desc(listings_count)) %>%
   slice_head(n = 10)

# View the data
head(top_10_suburbs)
```
```{r}
# Create the bar plot for the number of listings in each suburb
bar_plot <- ggplot(top_10_suburbs, aes(x = reorder(city, listings_count), y = listings_count)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Top 10 the Number of Listings Suburbs", x = "Suburb", y = "Number of Listings") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme_minimal()

# Convert ggplot to plotly for interactivity
interactive_bar_plot <- ggplotly(bar_plot)
interactive_bar_plot
# Save the plot to an HTML file
htmlwidgets::saveWidget(interactive_bar_plot, "suburb_listing_bar_chart.html")

```


```{r}

```

```{r}
# clean NA values rows
syd_airbnb <- airbnb %>%
  filter(!is.na(price)) 

# Convert Airbnb data to spatial format
airbnb_sf <- st_as_sf(syd_airbnb, coords = c("longitude", "latitude"), crs = 4326)
```

```{r}
airbnb_polygon <- airbnb_sf %>%
  group_by(id) %>% 
  summarise(geometry = st_union(geometry)) %>%
  st_convex_hull() 

```

```{r}
# Perform spatial join
airbnb_polygon_tranformed <- st_transform(airbnb_polygon, crs = st_crs(syd))

airbnb_polygon_joined <- st_join(syd, airbnb_polygon_tranformed, join = st_intersects)

airbnb_with_suburbs <- airbnb_polygon_joined %>%
  filter(!is.na(id))

airbnb_with_suburbs_with_price <- merge(syd_airbnb, airbnb_with_suburbs, by = "id")
```

```{r}
# Calculate price statistics per suburb
price_stats <- airbnb_with_suburbs_with_price %>%
  group_by(LOC_NAME) %>%
  summarise(
    avg_price = mean(price, na.rm = TRUE),
    min_price = min(price, na.rm = TRUE),
    max_price = max(price, na.rm = TRUE),
    median_price = median(price, na.rm = TRUE))
```

```{r}
# filter for interactive plot
suburbs <- unique(airbnb_with_suburbs_with_price$LOC_NAME)

# Create an initial plot with traces for each suburb
price_suburbs <- plot_ly()

for (suburb in suburbs) {
  price_suburbs <- price_suburbs %>%
    add_trace(
      data = filter(airbnb_with_suburbs_with_price, LOC_NAME == suburb),
      x = ~LOC_NAME,
      y = ~price,
      type = "box",
      name = suburb,
      visible = FALSE
    )
}

# Add a default trace (e.g., All Suburbs)
price_suburbs <- price_suburbs %>%
  add_trace(
    data = airbnb_with_suburbs_with_price,
    x = ~LOC_NAME,
    y = ~price,
    type = "box",
    name = "All Suburbs",
    visible = TRUE
  )

# Define dropdown buttons
buttons <- list(
  list(
    label = "All Suburbs",
    method = "update",
    args = list(list(visible = c(rep(FALSE, length(suburbs)), TRUE)), 
                list(title = "Price Distribution: All Suburbs"))
  )
)

# Add buttons for each suburb
for (i in seq_along(suburbs)) {
  buttons <- append(buttons, list(
    list(
      label = suburbs[i],
      method = "update",
      args = list(list(visible = c(rep(FALSE, i - 1), TRUE, rep(FALSE, length(suburbs) - i), FALSE)),
                  list(title = paste("Price Distribution:", suburbs[i])))
    )
  ))
}

# Add layout with dropdown
price_suburbs <- price_suburbs %>%
  layout(
    title = "Price Distribution Across Suburbs",
    xaxis = list(title = "Suburb", tickangle = 45),
    yaxis = list(title = "Price ($)"),
    updatemenus = list(
      list(
        type = "dropdown",
        x = 0.1, y = 1.2,
        buttons = buttons
      )
    )
  )

# Display the plot
price_suburbs
```


```{r}
# Average price by suburbs
price_avg_suburbs <- airbnb_with_suburbs_with_price %>%
  group_by(LOC_NAME) %>%
  summarise(avg_price = mean(price, na.rm = TRUE))%>%
  filter(!is.na(avg_price))

syd_price_avg_suburbs <- syd %>%
  left_join(price_avg_suburbs, by = c("LOC_NAME" = "LOC_NAME")) %>%
  filter(!is.na(avg_price)) 

suburbs <- unique(airbnb_with_suburbs_with_price$LOC_NAME)

# Create an initial plot with choropleth traces for each suburb
price_suburbs_map <- plot_ly()

# Iterate over suburbs to create choropleth traces
for (suburb in suburbs) {
  price_suburbs_map <- price_suburbs_map %>%
    add_trace(syd_price_avg_suburbs,
      type = "choropleth",
      locations = ~LOC_NAME,
      z = ~avg_price,  # Replace with the column for average price
      hoverinfo = "location+z",  # Display both location and value
      color = ~avg_price,  # Color based on average price
      colorscale = "YlOrRd",  # Color scale
      name = suburb,
      visible = FALSE,  # Set all traces as hidden by default
      colorbar = list(title = "Avg Price", ticks = "outside")
    )
}

# Add a default trace (e.g., All Suburbs)
price_suburbs_map <- price_suburbs_map %>%
  add_trace(syd_price_avg_suburbs,
    type = "choropleth",
    locations = ~LOC_NAME,
    z = ~avg_price,
    hoverinfo = "location+z",
    color = ~avg_price,
    colorscale = "YlOrRd",
    name = "All Suburbs",
    visible = TRUE,
    colorbar = list(title = "Avg Price", ticks = "outside")
  )

# Define dropdown buttons for each suburb
buttons <- list(
  list(
    label = "All Suburbs",
    method = "update",
    args = list(list(visible = c(rep(FALSE, length(suburbs)), TRUE)), 
                list(title = "Price Distribution: All Suburbs"))
  )
)

# Add buttons for each suburb
for (i in seq_along(suburbs)) {
  buttons <- append(buttons, list(
    list(
      label = suburbs[i],
      method = "update",
      args = list(list(visible = c(rep(FALSE, i - 1), TRUE, rep(FALSE, length(suburbs) - i), FALSE)),
                  list(title = paste("Price Distribution:", suburbs[i])))
    )
  ))
}

# Add layout with dropdown menu and customize the map appearance
price_suburbs_map <- price_suburbs_map %>%
  layout(
    geo = list(
      showlakes = TRUE,
      lakecolor = 'white',
      projection = list(type = 'mercator'),
      scope = 'world',  # You can set this to a specific region if needed
      visible = TRUE
    ),
    title = "Price Distribution Across Suburbs",
    updatemenus = list(
      list(
        type = "dropdown",
        x = 0.1, y = 1.2,
        buttons = buttons
      )
    )
  )

# Display the map
price_suburbs_map
```














